<?php
/**
 * Created by PhpStorm.
 * User: tao
 * Date: 10/27/2017
 * Time: 3:26 PM
 */
header("Content-type: text/html; charset=utf-8");
class SpiderController extends Controller
{
    /**
     * Declares class-based actions.
     */
    public function actions()
    {
        return array(
            // captcha action renders the CAPTCHA image displayed on the contact page
            'captcha'=>array(
                'class'=>'CCaptchaAction',
                'backColor'=>0xFFFFFF,
            ),
            // page action renders "static" pages stored under 'protected/views/site/pages'
            // They can be accessed via: index.php?r=site/page&view=FileName
            'page'=>array(
                'class'=>'CViewAction',
            ),
        );
    }
    /**
     * @return array action filters
     */
    public function filters()
    {
        return array(
            'accessControl', // perform access control for CRUD operations
            //'postOnly + delete', // we only allow deletion via POST request
        );
    }
    /**
     * Specifies the access control rules.
     * This method is used by the 'accessControl' filter.
     * @return array access control rules
     */
    public function accessRules() {
        return array(
            array(
                'allow', // allow authenticated user to perform 'create' and 'update' actions
                'actions'=>array(
                    'index',
                    'index1',
                    'index2',
                    'index3',
                    'index4',
                    'index5',
                    'index6',
                ),
                'users'=>array('*'),
            ),
            array('deny',  // deny all users
                'users'=>array('*'),
            ),
        );
    }

    //中国
    public function actionIndex() {
        header("Content-type: text/html; charset=gbk");

        $url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/index.html";
        $snoopy = new Snoopy;
        $snoopy->fetch($url);
        $txt = $snoopy->results; //显示结果
        $pattern = '/<td><a href=\'([^<>]+)\.html\'>([^<>]+)<br\/><\/a><\/td>/';
        preg_match_all($pattern,$txt,$result,PREG_SET_ORDER);
        StringUtil::out($result);
        /*for ($i = 0; $i < count($result); $i++) {
            $model = new Region();
            $model->code =  iconv("gbk","UTF-8//IGNORE",$result[$i][1]);
            $model->name = iconv("gbk","UTF-8//IGNORE",$result[$i][2]);
            $model->parent_id = 1;
            $model->level = 1;
            $model->save();
        }
        echo $i;*/
    }

    //各省
    public function actionIndex1() {
        header("Content-type: text/html; charset=gbk");

        $url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/11.html";
        $snoopy = new Snoopy;
        $snoopy->fetch($url); //获取所有内容
        //$snoopy->fetchtext($url); //获取文本内容（去掉html代码）
        //$snoopy->fetchlinks($url); //获取链接
        //$snoopy->fetchform($url);  //获取表单
        $txt = $snoopy->results; //显示结果
        //$txt = preg_replace('/\s*/', '', $txt);

        //$pattern = '/<td><a href=\'11\/([^<>]+).html\'>([^<>]+)<\/a><\/td>/';
        //$pattern = '/<tr class="citytr"><td><a href="([^<>]+)">([^<>]+)<\/a><\/td><td><a href="([^<>]+)">([^<>]+)<\/a><\/td><\/tr>/';
        $pattern = '/<tr class=\'citytr\'><td><a href=\'([^<>]+)\'>([^<>]+)<\/a><\/td><td><a href=\'([^<>]+)\'>([^<>]+)<\/a><\/td><\/tr>/';
        preg_match_all($pattern,$txt,$result,PREG_SET_ORDER);


        //echo $txt;
        StringUtil::out($result);

    }

    //各市
    public function actionIndex2() {

        for ($i=0; $i<10; $i++) {
            if ($i<5)
                continue;
            for ($j=0; $j<10; $j++) {
                echo $i.':'.$j.'<br/>';
            }
        }
    }

    //各市
    public function actionIndex3() {
        header("Content-type: text/html; charset=gbk");

        $url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/11/1101.html";
        $snoopy = new Snoopy;
        $snoopy->fetch($url); //获取所有内容
        $txt = $snoopy->results; //显示结果
        $pattern = '/<tr class=\'countytr\'><td><a href=\'([^<>]+)\'>([^<>]+)<\/a><\/td><td><a href=\'([^<>]+)\'>([^<>]+)<\/a><\/td><\/tr>/';
        preg_match_all($pattern,$txt,$result,PREG_SET_ORDER);
        //echo $txt;
        StringUtil::out($result);
    }

    //各县
    public function actionIndex4() {
        header("Content-type: text/html; charset=gbk");

        $url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/52/03/520324.html";
        $snoopy = new Snoopy;
        $snoopy->fetch($url); //获取所有内容
        $txt = $snoopy->results; //显示结果
        $pattern = '/<tr class=\'towntr\'><td><a href=\'([^<>]+)\'>([^<>]+)<\/a><\/td><td><a href=\'([^<>]+)\'>([^<>]+)<\/a><\/td><\/tr>/';
        preg_match_all($pattern,$txt,$rs_4,PREG_SET_ORDER);
        //echo $txt;
        StringUtil::out($rs_4);

        for ($d = 0; $d<count($rs_4); $d++) {
            $name_4 = $rs_4[$d][4];

            if($rs_4[$d][2] == '520324116000')
                $name_4 = 'fuyanzheng';
            $code_4 = substr($rs_4[$d][2], 0,9);
            $parent_code_4 = '000000';
            $level_4 = 4;

            $model = new Region();
            $model->code =  iconv("gbk","UTF-8//IGNORE",$code_4);
            $model->name = iconv("gbk","UTF-8//IGNORE",$name_4);
            $model->parent_code = $parent_code_4;
            $model->level = $level_4;
            $model->save();
        }


    }

    //各街道 镇 乡 农场
    public function actionIndex5() {
        header("Content-type: text/html; charset=gbk");

        $url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/52/23/25/522325104.html";
        $snoopy = new Snoopy;
        $snoopy->fetch($url); //获取所有内容
        $txt = $snoopy->results; //显示结果
        $pattern = '/<tr class=\'villagetr\'><td>([^<>]+)<\/td><td>([^<>]+)<\/td><td>([^<>]+)<\/td><\/tr>/';
        preg_match_all($pattern,$txt,$result,PREG_SET_ORDER);
        //echo $txt;
        StringUtil::out($result);
    }


    public function actionIndex6() {
        header("Content-type: text/html; charset=gbk");

        $url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/index.html";
        $snoopy = new Snoopy;
        $snoopy->fetch($url);
        $txt = $snoopy->results; //显示结果
        $pattern = '/<td><a href=\'([^<>]+)\.html\'>([^<>]+)<br\/><\/a><\/td>/';
        preg_match_all($pattern,$txt,$rs_1,PREG_SET_ORDER);

        $sum = 0;

        $name_0 = '中国';
        $code_0 = '100000';
        $parent_code_0 = '0';
        $level_0 = 0;

       /* echo '<div style="border: 4px solid black;margin: 32px auto 32px 0px;">';
            echo '<div style="background-color: rgba(0,0,0,0.5);padding: 32px;">';
            echo 'code='.$code_0.'<br/>';
            echo 'name='.$name_0.'<br/>';
            echo 'parent='.$parent_code_0.'<br/>';
            echo 'level='.$level_0.'<br/>';
            echo '</div>';
            StringUtil::out($rs_1);
        echo '</div>';*/

       if(count($rs_1) > 0){
            for ($a = 23; $a<count($rs_1); $a++) {
                $url_1 = 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/'.$rs_1[$a][1].'.html';
                $name_1 = $rs_1[$a][2];
                $code_1 = $rs_1[$a][1].'0000';
                $parent_code_1 = $code_0;
                $level_1 = 1;

                $model = new Region();
                $model->code =  iconv("gbk","UTF-8//IGNORE",$code_1);
                $model->name = iconv("gbk","UTF-8//IGNORE",$name_1);
                $model->parent_code = $parent_code_1;
                $model->level = $level_1;
                $model->save();
                $sum++;


                $snoopy->fetch($url_1);
                $txt_1 = $snoopy->results;
                $pattern = '/<tr class=\'citytr\'><td><a href=\'([^<>]+)\'>([^<>]+)<\/a><\/td><td><a href=\'([^<>]+)\'>([^<>]+)<\/a><\/td><\/tr>/';
                preg_match_all($pattern,$txt_1,$rs_2,PREG_SET_ORDER);

             /*   echo '<div style="border: 4px solid red;margin: 32px auto 32px 50px;">';
                    echo '<div style="background-color: rgba(0,0,0,0.5);padding: 32px;">';
                    echo 'code='.$code_1.'<br/>';
                    echo 'name='.$name_1.'<br/>';
                    echo 'parent='.$parent_code_1.'<br/>';
                    echo 'level='.$level_1.'<br/>';
                    echo '</div>';
                    StringUtil::out($rs_2);
                echo '</div>';*/

                if(count($rs_2) > 0){
                    for ($b = 0; $b<count($rs_2); $b++) {

                        $url_2 = 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/'.$rs_2[$b][1];
                        $name_2 = $rs_2[$b][4];
                        $code_2 = substr($rs_2[$b][2], 0,6);
                        $parent_code_2 = $code_1;
                        $level_2 = 2;

                        $model = new Region();
                        $model->code =  iconv("gbk","UTF-8//IGNORE",$code_2);
                        $model->name = iconv("gbk","UTF-8//IGNORE",$name_2);
                        $model->parent_code = $parent_code_2;
                        $model->level = $level_2;
                        $model->save();
                        $sum++;

                        $snoopy->fetch($url_2);
                        $txt_2 = $snoopy->results;
                        $pattern = '/<tr class=\'countytr\'><td><a href=\'([^<>]+)\'>([^<>]+)<\/a><\/td><td><a href=\'([^<>]+)\'>([^<>]+)<\/a><\/td><\/tr>/';
                        preg_match_all($pattern,$txt_2,$rs_3,PREG_SET_ORDER);

                       /* echo '<div style="border: 4px solid blue;margin: 32px auto 32px 100px;">';
                            echo '<div style="background-color: rgba(0,0,0,0.5);padding: 32px;">';
                            echo 'code='.$code_2.'<br/>';
                            echo 'name='.$name_2.'<br/>';
                            echo 'parent='.$parent_code_2.'<br/>';
                            echo 'level='.$level_2.'<br/>';
                            echo '</div>';
                            StringUtil::out($rs_3);
                        echo '</div>';*/

                        if(count($rs_3) > 0){
                            for ($c = 0; $c<count($rs_3); $c++) {

                                $url_3 = 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/'.$rs_1[$a][1].'/'.$rs_3[$c][1];
                                $name_3 = $rs_3[$c][4];
                                $code_3 = substr($rs_3[$c][2], 0,6);
                                $parent_code_3 = $code_2;
                                $level_3 = 3;
                                $model = new Region();
                                $model->code =  iconv("gbk","UTF-8//IGNORE",$code_3);
                                $model->name = iconv("gbk","UTF-8//IGNORE",$name_3);
                                $model->parent_code = $parent_code_3;
                                $model->level = $level_3;
                                $model->save();
                                $sum++;

                                $snoopy->fetch($url_3);
                                $txt_3 = $snoopy->results;
                                $pattern = '/<tr class=\'towntr\'><td><a href=\'([^<>]+)\'>([^<>]+)<\/a><\/td><td><a href=\'([^<>]+)\'>([^<>]+)<\/a><\/td><\/tr>/';
                                preg_match_all($pattern,$txt_3,$rs_4,PREG_SET_ORDER);

                               /* echo '<div style="border: 4px solid gray;margin: 32px auto 32px 150px;">';
                                echo '<div style="background-color: rgba(0,0,0,0.5);padding: 32px;">';
                                echo 'code='.$code_3.'<br/>';
                                echo 'name='.$name_3.'<br/>';
                                echo 'parent='.$parent_code_3.'<br/>';
                                echo 'level='.$level_3.'<br/>';
                                echo '</div>';
                                StringUtil::out($rs_4);
                                echo '</div>';*/

                                if(count($rs_4) > 0){
                                    for ($d = 0; $d<count($rs_4); $d++) {

                                        $url_4 = 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/'.$rs_1[$a][1].'/'.substr($rs_3[$c][2], 2,2).'/'.$rs_4[$d][1];
                                        $name_4 = $rs_4[$d][4];

                                        if($rs_4[$d][2] == '520324116000')
                                            $name_4 = 'fuyanzhen'; //桴焉镇
                                        $code_4 = substr($rs_4[$d][2], 0,9);
                                        $parent_code_4 = $code_3;
                                        $level_4 = 4;

                                        $model = new Region();
                                        $model->code =  iconv("gbk","UTF-8//IGNORE",$code_4);
                                        $model->name = iconv("gbk","UTF-8//IGNORE",$name_4);
                                        $model->parent_code = $parent_code_4;
                                        $model->level = $level_4;
                                        $model->save();
                                        $sum++;

                                        $snoopy->fetch($url_4);
                                        $txt_4 = $snoopy->results;
                                        $pattern = '/<tr class=\'villagetr\'><td>([^<>]+)<\/td><td>([^<>]+)<\/td><td>([^<>]+)<\/td><\/tr>/';
                                        preg_match_all($pattern,$txt_4,$rs_5,PREG_SET_ORDER);


                                        /*echo '<div style="border: 4px solid yellow;margin: 32px auto 32px 200px;">';
                                        echo '<div style="background-color: rgba(0,0,0,0.5);padding: 32px;">';
                                        echo 'code='.$code_4.'<br/>';
                                        echo 'name='.$name_4.'<br/>';
                                        echo 'parent='.$parent_code_4.'<br/>';
                                        echo 'level='.$level_4.'<br/>';
                                        echo '</div>';
                                        StringUtil::out($rs_5);
                                        echo '</div>';*/


                                        if(count($rs_5) > 0){
                                            for ($i = 0; $i<count($rs_5); $i++) {

                                                $name_5 = $rs_5[$i][3];
                                                $town_code = $rs_5[$i][2];
                                                $code_5 = $rs_5[$i][1];
                                                $parent_code_5 = $code_4;
                                                $level_5 = 5;

                                                $model = new Region();
                                                $model->code =  iconv("gbk","UTF-8//IGNORE",$code_5);
                                                $model->name = iconv("gbk","UTF-8//IGNORE",$name_5);
                                                $model->parent_code = $parent_code_5;
                                                $model->level = $level_5;
                                                $model->town_code = $town_code;
                                                $model->save();
                                                $sum++;
                                                echo $sum.'<br/>';
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}